library(readr)
library(tidyverse)
library(readxl)
library(here)
#test where the top level of the project directory is
here::here()
[1] "/Users/greganderson/codeclan_work/data_cleaning_project/task_4"
#use this to set the path to the data files
candy_2015 <- read_xlsx(here("raw_data/boing-boing-candy-2015.xlsx"))
candy_2016 <- read_xlsx(here("raw_data/boing-boing-candy-2016.xlsx"))
candy_2017 <- read_xlsx(here("raw_data/boing-boing-candy-2017.xlsx"))
New names:
* `` -> ...114
dim(candy_2015)
[1] 5630 124
dim(candy_2016)
[1] 1259 123
dim(candy_2017)
[1] 2460 120
names(candy_2015)
[1] "Timestamp"
[2] "How old are you?"
[3] "Are you going actually going trick or treating yourself?"
[4] "[Butterfinger]"
[5] "[100 Grand Bar]"
[6] "[Anonymous brown globs that come in black and orange wrappers]"
[7] "[Any full-sized candy bar]"
[8] "[Black Jacks]"
[9] "[Bonkers]"
[10] "[Bottle Caps]"
[11] "[Box’o’ Raisins]"
[12] "[Brach products (not including candy corn)]"
[13] "[Bubble Gum]"
[14] "[Cadbury Creme Eggs]"
[15] "[Candy Corn]"
[16] "[Vials of pure high fructose corn syrup, for main-lining into your vein]"
[17] "[Candy that is clearly just the stuff given out for free at restaurants]"
[18] "[Cash, or other forms of legal tender]"
[19] "[Chiclets]"
[20] "[Caramellos]"
[21] "[Snickers]"
[22] "[Dark Chocolate Hershey]"
[23] "[Dental paraphenalia]"
[24] "[Dots]"
[25] "[Fuzzy Peaches]"
[26] "[Generic Brand Acetaminophen]"
[27] "[Glow sticks]"
[28] "[Broken glow stick]"
[29] "[Goo Goo Clusters]"
[30] "[Good N' Plenty]"
[31] "[Gum from baseball cards]"
[32] "[Gummy Bears straight up]"
[33] "[Creepy Religious comics/Chick Tracts]"
[34] "[Healthy Fruit]"
[35] "[Heath Bar]"
[36] "[Hershey’s Kissables]"
[37] "[Hershey’s Milk Chocolate]"
[38] "[Hugs (actual physical hugs)]"
[39] "[Jolly Rancher (bad flavor)]"
[40] "[Jolly Ranchers (good flavor)]"
[41] "[Kale smoothie]"
[42] "[Kinder Happy Hippo]"
[43] "[Kit Kat]"
[44] "[Hard Candy]"
[45] "[Lapel Pins]"
[46] "[LemonHeads]"
[47] "[Licorice]"
[48] "[Licorice (not black)]"
[49] "[Lindt Truffle]"
[50] "[Lollipops]"
[51] "[Mars]"
[52] "[Mary Janes]"
[53] "[Maynards]"
[54] "[Milk Duds]"
[55] "[LaffyTaffy]"
[56] "[Minibags of chips]"
[57] "[JoyJoy (Mit Iodine)]"
[58] "[Reggie Jackson Bar]"
[59] "[Pixy Stix]"
[60] "[Nerds]"
[61] "[Nestle Crunch]"
[62] "[Now'n'Laters]"
[63] "[Pencils]"
[64] "[Milky Way]"
[65] "[Reese’s Peanut Butter Cups]"
[66] "[Tolberone something or other]"
[67] "[Runts]"
[68] "[Junior Mints]"
[69] "[Senior Mints]"
[70] "[Mint Kisses]"
[71] "[Mint Juleps]"
[72] "[Mint Leaves]"
[73] "[Peanut M&M’s]"
[74] "[Regular M&Ms]"
[75] "[Mint M&Ms]"
[76] "[Ribbon candy]"
[77] "[Rolos]"
[78] "[Skittles]"
[79] "[Smarties (American)]"
[80] "[Smarties (Commonwealth)]"
[81] "[Chick-o-Sticks (we don’t know what that is)]"
[82] "[Spotted Dick]"
[83] "[Starburst]"
[84] "[Swedish Fish]"
[85] "[Sweetums]"
[86] "[Those odd marshmallow circus peanut things]"
[87] "[Three Musketeers]"
[88] "[Peterson Brand Sidewalk Chalk]"
[89] "[Peanut Butter Bars]"
[90] "[Peanut Butter Jars]"
[91] "[Trail Mix]"
[92] "[Twix]"
[93] "[Vicodin]"
[94] "[White Bread]"
[95] "[Whole Wheat anything]"
[96] "[York Peppermint Patties]"
[97] "Please leave any remarks or comments regarding your choices."
[98] "Please list any items not included above that give you JOY."
[99] "Please list any items not included above that give you DESPAIR."
[100] "Guess the number of mints in my hand."
[101] "Betty or Veronica?"
[102] "Check all that apply: \"I cried tears of sadness at the end of ____________\""
[103] "\"That dress* that went viral early this year - when I first saw it, it was ________\""
[104] "Fill in the blank: \"Taylor Swift is a force for ___________\""
[105] "What is your favourite font?"
[106] "If you squint really hard, the words \"Intelligent Design\" would look like."
[107] "Fill in the blank: \"Imitation is a form of ____________\""
[108] "Please estimate the degree(s) of separation you have from the following celebrities [JK Rowling]"
[109] "Please estimate the degree(s) of separation you have from the following celebrities [JJ Abrams]"
[110] "Please estimate the degree(s) of separation you have from the following celebrities [Beyoncé]"
[111] "Please estimate the degree(s) of separation you have from the following celebrities [Bieber]"
[112] "Please estimate the degree(s) of separation you have from the following celebrities [Kevin Bacon]"
[113] "Please estimate the degree(s) of separation you have from the following celebrities [Francis Bacon (1561 - 1626)]"
[114] "[Sea-salt flavored stuff, probably chocolate, since this is the \"it\" flavor of the year]"
[115] "[Necco Wafers]"
[116] "Which day do you prefer, Friday or Sunday?"
[117] "Please estimate the degrees of separation you have from the following folks [Bruce Lee]"
[118] "Please estimate the degrees of separation you have from the following folks [JK Rowling]"
[119] "Please estimate the degrees of separation you have from the following folks [Malala Yousafzai]"
[120] "Please estimate the degrees of separation you have from the following folks [Thom Yorke]"
[121] "Please estimate the degrees of separation you have from the following folks [JJ Abrams]"
[122] "Please estimate the degrees of separation you have from the following folks [Hillary Clinton]"
[123] "Please estimate the degrees of separation you have from the following folks [Donald Trump]"
[124] "Please estimate the degrees of separation you have from the following folks [Beyoncé Knowles]"
names(candy_2016)
[1] "Timestamp"
[2] "Are you going actually going trick or treating yourself?"
[3] "Your gender:"
[4] "How old are you?"
[5] "Which country do you live in?"
[6] "Which state, province, county do you live in?"
[7] "[100 Grand Bar]"
[8] "[Anonymous brown globs that come in black and orange wrappers]"
[9] "[Any full-sized candy bar]"
[10] "[Black Jacks]"
[11] "[Bonkers (the candy)]"
[12] "[Bonkers (the board game)]"
[13] "[Bottle Caps]"
[14] "[Box'o'Raisins]"
[15] "[Broken glow stick]"
[16] "[Butterfinger]"
[17] "[Cadbury Creme Eggs]"
[18] "[Candy Corn]"
[19] "[Candy that is clearly just the stuff given out for free at restaurants]"
[20] "[Caramellos]"
[21] "[Cash, or other forms of legal tender]"
[22] "[Chardonnay]"
[23] "[Chick-o-Sticks (we don’t know what that is)]"
[24] "[Chiclets]"
[25] "[Coffee Crisp]"
[26] "[Creepy Religious comics/Chick Tracts]"
[27] "[Dental paraphenalia]"
[28] "[Dots]"
[29] "[Dove Bars]"
[30] "[Fuzzy Peaches]"
[31] "[Generic Brand Acetaminophen]"
[32] "[Glow sticks]"
[33] "[Goo Goo Clusters]"
[34] "[Good N' Plenty]"
[35] "[Gum from baseball cards]"
[36] "[Gummy Bears straight up]"
[37] "[Hard Candy]"
[38] "[Healthy Fruit]"
[39] "[Heath Bar]"
[40] "[Hershey's Dark Chocolate]"
[41] "[Hershey’s Milk Chocolate]"
[42] "[Hershey's Kisses]"
[43] "[Hugs (actual physical hugs)]"
[44] "[Jolly Rancher (bad flavor)]"
[45] "[Jolly Ranchers (good flavor)]"
[46] "[JoyJoy (Mit Iodine!)]"
[47] "[Junior Mints]"
[48] "[Senior Mints]"
[49] "[Kale smoothie]"
[50] "[Kinder Happy Hippo]"
[51] "[Kit Kat]"
[52] "[LaffyTaffy]"
[53] "[LemonHeads]"
[54] "[Licorice (not black)]"
[55] "[Licorice (yes black)]"
[56] "[Lindt Truffle]"
[57] "[Lollipops]"
[58] "[Mars]"
[59] "[Mary Janes]"
[60] "[Maynards]"
[61] "[Mike and Ike]"
[62] "[Milk Duds]"
[63] "[Milky Way]"
[64] "[Regular M&Ms]"
[65] "[Peanut M&M’s]"
[66] "[Blue M&M's]"
[67] "[Red M&M's]"
[68] "[Third Party M&M's]"
[69] "[Minibags of chips]"
[70] "[Mint Kisses]"
[71] "[Mint Juleps]"
[72] "[Mr. Goodbar]"
[73] "[Necco Wafers]"
[74] "[Nerds]"
[75] "[Nestle Crunch]"
[76] "[Now'n'Laters]"
[77] "[Peeps]"
[78] "[Pencils]"
[79] "[Person of Interest Season 3 DVD Box Set (not including Disc 4 with hilarious outtakes)]"
[80] "[Pixy Stix]"
[81] "[Reese’s Peanut Butter Cups]"
[82] "[Reese's Pieces]"
[83] "[Reggie Jackson Bar]"
[84] "[Rolos]"
[85] "[Skittles]"
[86] "[Smarties (American)]"
[87] "[Smarties (Commonwealth)]"
[88] "[Snickers]"
[89] "[Sourpatch Kids (i.e. abominations of nature)]"
[90] "[Spotted Dick]"
[91] "[Starburst]"
[92] "[Sweet Tarts]"
[93] "[Swedish Fish]"
[94] "[Sweetums (a friend to diabetes)]"
[95] "[Tic Tacs]"
[96] "[Those odd marshmallow circus peanut things]"
[97] "[Three Musketeers]"
[98] "[Tolberone something or other]"
[99] "[Trail Mix]"
[100] "[Twix]"
[101] "[Vials of pure high fructose corn syrup, for main-lining into your vein]"
[102] "[Vicodin]"
[103] "[Whatchamacallit Bars]"
[104] "[White Bread]"
[105] "[Whole Wheat anything]"
[106] "[York Peppermint Patties]"
[107] "Please list any items not included above that give you JOY."
[108] "Please list any items not included above that give you DESPAIR."
[109] "Please leave any witty, snarky or thoughtful remarks or comments regarding your choices."
[110] "Guess the number of mints in my hand."
[111] "Betty or Veronica?"
[112] "\"That dress* that went viral a few years back - when I first saw it, it was ________\""
[113] "What is your favourite font?"
[114] "Please estimate the degree(s) of separation you have from the following celebrities [JK Rowling]"
[115] "Please estimate the degree(s) of separation you have from the following celebrities [JJ Abrams]"
[116] "Please estimate the degree(s) of separation you have from the following celebrities [Beyoncé]"
[117] "Please estimate the degree(s) of separation you have from the following celebrities [Bieber]"
[118] "Please estimate the degree(s) of separation you have from the following celebrities [Kevin Bacon]"
[119] "Please estimate the degree(s) of separation you have from the following celebrities [Francis Bacon (1561 - 1626)]"
[120] "Which day do you prefer, Friday or Sunday?"
[121] "Do you eat apples the correct way, East to West (side to side) or do you eat them like a freak of nature, South to North (bottom to top)?"
[122] "When you see the above image of the 4 different websites, which one would you most likely check out (please be honest)."
[123] "[York Peppermint Patties] Ignore"
names(candy_2017)
[1] "Internal ID"
[2] "Q1: GOING OUT?"
[3] "Q2: GENDER"
[4] "Q3: AGE"
[5] "Q4: COUNTRY"
[6] "Q5: STATE, PROVINCE, COUNTY, ETC"
[7] "Q6 | 100 Grand Bar"
[8] "Q6 | Anonymous brown globs that come in black and orange wrappers\t(a.k.a. Mary Janes)"
[9] "Q6 | Any full-sized candy bar"
[10] "Q6 | Black Jacks"
[11] "Q6 | Bonkers (the candy)"
[12] "Q6 | Bonkers (the board game)"
[13] "Q6 | Bottle Caps"
[14] "Q6 | Box'o'Raisins"
[15] "Q6 | Broken glow stick"
[16] "Q6 | Butterfinger"
[17] "Q6 | Cadbury Creme Eggs"
[18] "Q6 | Candy Corn"
[19] "Q6 | Candy that is clearly just the stuff given out for free at restaurants"
[20] "Q6 | Caramellos"
[21] "Q6 | Cash, or other forms of legal tender"
[22] "Q6 | Chardonnay"
[23] "Q6 | Chick-o-Sticks (we don’t know what that is)"
[24] "Q6 | Chiclets"
[25] "Q6 | Coffee Crisp"
[26] "Q6 | Creepy Religious comics/Chick Tracts"
[27] "Q6 | Dental paraphenalia"
[28] "Q6 | Dots"
[29] "Q6 | Dove Bars"
[30] "Q6 | Fuzzy Peaches"
[31] "Q6 | Generic Brand Acetaminophen"
[32] "Q6 | Glow sticks"
[33] "Q6 | Goo Goo Clusters"
[34] "Q6 | Good N' Plenty"
[35] "Q6 | Gum from baseball cards"
[36] "Q6 | Gummy Bears straight up"
[37] "Q6 | Hard Candy"
[38] "Q6 | Healthy Fruit"
[39] "Q6 | Heath Bar"
[40] "Q6 | Hershey's Dark Chocolate"
[41] "Q6 | Hershey’s Milk Chocolate"
[42] "Q6 | Hershey's Kisses"
[43] "Q6 | Hugs (actual physical hugs)"
[44] "Q6 | Jolly Rancher (bad flavor)"
[45] "Q6 | Jolly Ranchers (good flavor)"
[46] "Q6 | JoyJoy (Mit Iodine!)"
[47] "Q6 | Junior Mints"
[48] "Q6 | Senior Mints"
[49] "Q6 | Kale smoothie"
[50] "Q6 | Kinder Happy Hippo"
[51] "Q6 | Kit Kat"
[52] "Q6 | LaffyTaffy"
[53] "Q6 | LemonHeads"
[54] "Q6 | Licorice (not black)"
[55] "Q6 | Licorice (yes black)"
[56] "Q6 | Lindt Truffle"
[57] "Q6 | Lollipops"
[58] "Q6 | Mars"
[59] "Q6 | Maynards"
[60] "Q6 | Mike and Ike"
[61] "Q6 | Milk Duds"
[62] "Q6 | Milky Way"
[63] "Q6 | Regular M&Ms"
[64] "Q6 | Peanut M&M’s"
[65] "Q6 | Blue M&M's"
[66] "Q6 | Red M&M's"
[67] "Q6 | Green Party M&M's"
[68] "Q6 | Independent M&M's"
[69] "Q6 | Abstained from M&M'ing."
[70] "Q6 | Minibags of chips"
[71] "Q6 | Mint Kisses"
[72] "Q6 | Mint Juleps"
[73] "Q6 | Mr. Goodbar"
[74] "Q6 | Necco Wafers"
[75] "Q6 | Nerds"
[76] "Q6 | Nestle Crunch"
[77] "Q6 | Now'n'Laters"
[78] "Q6 | Peeps"
[79] "Q6 | Pencils"
[80] "Q6 | Pixy Stix"
[81] "Q6 | Real Housewives of Orange County Season 9 Blue-Ray"
[82] "Q6 | Reese’s Peanut Butter Cups"
[83] "Q6 | Reese's Pieces"
[84] "Q6 | Reggie Jackson Bar"
[85] "Q6 | Rolos"
[86] "Q6 | Sandwich-sized bags filled with BooBerry Crunch"
[87] "Q6 | Skittles"
[88] "Q6 | Smarties (American)"
[89] "Q6 | Smarties (Commonwealth)"
[90] "Q6 | Snickers"
[91] "Q6 | Sourpatch Kids (i.e. abominations of nature)"
[92] "Q6 | Spotted Dick"
[93] "Q6 | Starburst"
[94] "Q6 | Sweet Tarts"
[95] "Q6 | Swedish Fish"
[96] "Q6 | Sweetums (a friend to diabetes)"
[97] "Q6 | Take 5"
[98] "Q6 | Tic Tacs"
[99] "Q6 | Those odd marshmallow circus peanut things"
[100] "Q6 | Three Musketeers"
[101] "Q6 | Tolberone something or other"
[102] "Q6 | Trail Mix"
[103] "Q6 | Twix"
[104] "Q6 | Vials of pure high fructose corn syrup, for main-lining into your vein"
[105] "Q6 | Vicodin"
[106] "Q6 | Whatchamacallit Bars"
[107] "Q6 | White Bread"
[108] "Q6 | Whole Wheat anything"
[109] "Q6 | York Peppermint Patties"
[110] "Q7: JOY OTHER"
[111] "Q8: DESPAIR OTHER"
[112] "Q9: OTHER COMMENTS"
[113] "Q10: DRESS"
[114] "...114"
[115] "Q11: DAY"
[116] "Q12: MEDIA [Daily Dish]"
[117] "Q12: MEDIA [Science]"
[118] "Q12: MEDIA [ESPN]"
[119] "Q12: MEDIA [Yahoo]"
[120] "Click Coordinates (x, y)"
candy_2015
candy_2016
candy_2017
NA
# clean names
library(janitor)
# 2005
candy_2015_1 <- clean_names(candy_2015)
names(candy_2015_1)
[1] "timestamp"
[2] "how_old_are_you"
[3] "are_you_going_actually_going_trick_or_treating_yourself"
[4] "butterfinger"
[5] "x100_grand_bar"
[6] "anonymous_brown_globs_that_come_in_black_and_orange_wrappers"
[7] "any_full_sized_candy_bar"
[8] "black_jacks"
[9] "bonkers"
[10] "bottle_caps"
[11] "box_o_raisins"
[12] "brach_products_not_including_candy_corn"
[13] "bubble_gum"
[14] "cadbury_creme_eggs"
[15] "candy_corn"
[16] "vials_of_pure_high_fructose_corn_syrup_for_main_lining_into_your_vein"
[17] "candy_that_is_clearly_just_the_stuff_given_out_for_free_at_restaurants"
[18] "cash_or_other_forms_of_legal_tender"
[19] "chiclets"
[20] "caramellos"
[21] "snickers"
[22] "dark_chocolate_hershey"
[23] "dental_paraphenalia"
[24] "dots"
[25] "fuzzy_peaches"
[26] "generic_brand_acetaminophen"
[27] "glow_sticks"
[28] "broken_glow_stick"
[29] "goo_goo_clusters"
[30] "good_n_plenty"
[31] "gum_from_baseball_cards"
[32] "gummy_bears_straight_up"
[33] "creepy_religious_comics_chick_tracts"
[34] "healthy_fruit"
[35] "heath_bar"
[36] "hershey_s_kissables"
[37] "hershey_s_milk_chocolate"
[38] "hugs_actual_physical_hugs"
[39] "jolly_rancher_bad_flavor"
[40] "jolly_ranchers_good_flavor"
[41] "kale_smoothie"
[42] "kinder_happy_hippo"
[43] "kit_kat"
[44] "hard_candy"
[45] "lapel_pins"
[46] "lemon_heads"
[47] "licorice"
[48] "licorice_not_black"
[49] "lindt_truffle"
[50] "lollipops"
[51] "mars"
[52] "mary_janes"
[53] "maynards"
[54] "milk_duds"
[55] "laffy_taffy"
[56] "minibags_of_chips"
[57] "joy_joy_mit_iodine"
[58] "reggie_jackson_bar"
[59] "pixy_stix"
[60] "nerds"
[61] "nestle_crunch"
[62] "nown_laters"
[63] "pencils"
[64] "milky_way"
[65] "reese_s_peanut_butter_cups"
[66] "tolberone_something_or_other"
[67] "runts"
[68] "junior_mints"
[69] "senior_mints"
[70] "mint_kisses"
[71] "mint_juleps"
[72] "mint_leaves"
[73] "peanut_m_m_s"
[74] "regular_m_ms"
[75] "mint_m_ms"
[76] "ribbon_candy"
[77] "rolos"
[78] "skittles"
[79] "smarties_american"
[80] "smarties_commonwealth"
[81] "chick_o_sticks_we_don_t_know_what_that_is"
[82] "spotted_dick"
[83] "starburst"
[84] "swedish_fish"
[85] "sweetums"
[86] "those_odd_marshmallow_circus_peanut_things"
[87] "three_musketeers"
[88] "peterson_brand_sidewalk_chalk"
[89] "peanut_butter_bars"
[90] "peanut_butter_jars"
[91] "trail_mix"
[92] "twix"
[93] "vicodin"
[94] "white_bread"
[95] "whole_wheat_anything"
[96] "york_peppermint_patties"
[97] "please_leave_any_remarks_or_comments_regarding_your_choices"
[98] "please_list_any_items_not_included_above_that_give_you_joy"
[99] "please_list_any_items_not_included_above_that_give_you_despair"
[100] "guess_the_number_of_mints_in_my_hand"
[101] "betty_or_veronica"
[102] "check_all_that_apply_i_cried_tears_of_sadness_at_the_end_of"
[103] "that_dress_that_went_viral_early_this_year_when_i_first_saw_it_it_was"
[104] "fill_in_the_blank_taylor_swift_is_a_force_for"
[105] "what_is_your_favourite_font"
[106] "if_you_squint_really_hard_the_words_intelligent_design_would_look_like"
[107] "fill_in_the_blank_imitation_is_a_form_of"
[108] "please_estimate_the_degree_s_of_separation_you_have_from_the_following_celebrities_jk_rowling"
[109] "please_estimate_the_degree_s_of_separation_you_have_from_the_following_celebrities_jj_abrams"
[110] "please_estimate_the_degree_s_of_separation_you_have_from_the_following_celebrities_beyonce"
[111] "please_estimate_the_degree_s_of_separation_you_have_from_the_following_celebrities_bieber"
[112] "please_estimate_the_degree_s_of_separation_you_have_from_the_following_celebrities_kevin_bacon"
[113] "please_estimate_the_degree_s_of_separation_you_have_from_the_following_celebrities_francis_bacon_1561_1626"
[114] "sea_salt_flavored_stuff_probably_chocolate_since_this_is_the_it_flavor_of_the_year"
[115] "necco_wafers"
[116] "which_day_do_you_prefer_friday_or_sunday"
[117] "please_estimate_the_degrees_of_separation_you_have_from_the_following_folks_bruce_lee"
[118] "please_estimate_the_degrees_of_separation_you_have_from_the_following_folks_jk_rowling"
[119] "please_estimate_the_degrees_of_separation_you_have_from_the_following_folks_malala_yousafzai"
[120] "please_estimate_the_degrees_of_separation_you_have_from_the_following_folks_thom_yorke"
[121] "please_estimate_the_degrees_of_separation_you_have_from_the_following_folks_jj_abrams"
[122] "please_estimate_the_degrees_of_separation_you_have_from_the_following_folks_hillary_clinton"
[123] "please_estimate_the_degrees_of_separation_you_have_from_the_following_folks_donald_trump"
[124] "please_estimate_the_degrees_of_separation_you_have_from_the_following_folks_beyonce_knowles"
# 2006
candy_2016_1 <- clean_names(candy_2016)
names(candy_2016_1)
[1] "timestamp"
[2] "are_you_going_actually_going_trick_or_treating_yourself"
[3] "your_gender"
[4] "how_old_are_you"
[5] "which_country_do_you_live_in"
[6] "which_state_province_county_do_you_live_in"
[7] "x100_grand_bar"
[8] "anonymous_brown_globs_that_come_in_black_and_orange_wrappers"
[9] "any_full_sized_candy_bar"
[10] "black_jacks"
[11] "bonkers_the_candy"
[12] "bonkers_the_board_game"
[13] "bottle_caps"
[14] "boxo_raisins"
[15] "broken_glow_stick"
[16] "butterfinger"
[17] "cadbury_creme_eggs"
[18] "candy_corn"
[19] "candy_that_is_clearly_just_the_stuff_given_out_for_free_at_restaurants"
[20] "caramellos"
[21] "cash_or_other_forms_of_legal_tender"
[22] "chardonnay"
[23] "chick_o_sticks_we_don_t_know_what_that_is"
[24] "chiclets"
[25] "coffee_crisp"
[26] "creepy_religious_comics_chick_tracts"
[27] "dental_paraphenalia"
[28] "dots"
[29] "dove_bars"
[30] "fuzzy_peaches"
[31] "generic_brand_acetaminophen"
[32] "glow_sticks"
[33] "goo_goo_clusters"
[34] "good_n_plenty"
[35] "gum_from_baseball_cards"
[36] "gummy_bears_straight_up"
[37] "hard_candy"
[38] "healthy_fruit"
[39] "heath_bar"
[40] "hersheys_dark_chocolate"
[41] "hershey_s_milk_chocolate"
[42] "hersheys_kisses"
[43] "hugs_actual_physical_hugs"
[44] "jolly_rancher_bad_flavor"
[45] "jolly_ranchers_good_flavor"
[46] "joy_joy_mit_iodine"
[47] "junior_mints"
[48] "senior_mints"
[49] "kale_smoothie"
[50] "kinder_happy_hippo"
[51] "kit_kat"
[52] "laffy_taffy"
[53] "lemon_heads"
[54] "licorice_not_black"
[55] "licorice_yes_black"
[56] "lindt_truffle"
[57] "lollipops"
[58] "mars"
[59] "mary_janes"
[60] "maynards"
[61] "mike_and_ike"
[62] "milk_duds"
[63] "milky_way"
[64] "regular_m_ms"
[65] "peanut_m_m_s"
[66] "blue_m_ms"
[67] "red_m_ms"
[68] "third_party_m_ms"
[69] "minibags_of_chips"
[70] "mint_kisses"
[71] "mint_juleps"
[72] "mr_goodbar"
[73] "necco_wafers"
[74] "nerds"
[75] "nestle_crunch"
[76] "nown_laters"
[77] "peeps"
[78] "pencils"
[79] "person_of_interest_season_3_dvd_box_set_not_including_disc_4_with_hilarious_outtakes"
[80] "pixy_stix"
[81] "reese_s_peanut_butter_cups"
[82] "reeses_pieces"
[83] "reggie_jackson_bar"
[84] "rolos"
[85] "skittles"
[86] "smarties_american"
[87] "smarties_commonwealth"
[88] "snickers"
[89] "sourpatch_kids_i_e_abominations_of_nature"
[90] "spotted_dick"
[91] "starburst"
[92] "sweet_tarts"
[93] "swedish_fish"
[94] "sweetums_a_friend_to_diabetes"
[95] "tic_tacs"
[96] "those_odd_marshmallow_circus_peanut_things"
[97] "three_musketeers"
[98] "tolberone_something_or_other"
[99] "trail_mix"
[100] "twix"
[101] "vials_of_pure_high_fructose_corn_syrup_for_main_lining_into_your_vein"
[102] "vicodin"
[103] "whatchamacallit_bars"
[104] "white_bread"
[105] "whole_wheat_anything"
[106] "york_peppermint_patties"
[107] "please_list_any_items_not_included_above_that_give_you_joy"
[108] "please_list_any_items_not_included_above_that_give_you_despair"
[109] "please_leave_any_witty_snarky_or_thoughtful_remarks_or_comments_regarding_your_choices"
[110] "guess_the_number_of_mints_in_my_hand"
[111] "betty_or_veronica"
[112] "that_dress_that_went_viral_a_few_years_back_when_i_first_saw_it_it_was"
[113] "what_is_your_favourite_font"
[114] "please_estimate_the_degree_s_of_separation_you_have_from_the_following_celebrities_jk_rowling"
[115] "please_estimate_the_degree_s_of_separation_you_have_from_the_following_celebrities_jj_abrams"
[116] "please_estimate_the_degree_s_of_separation_you_have_from_the_following_celebrities_beyonce"
[117] "please_estimate_the_degree_s_of_separation_you_have_from_the_following_celebrities_bieber"
[118] "please_estimate_the_degree_s_of_separation_you_have_from_the_following_celebrities_kevin_bacon"
[119] "please_estimate_the_degree_s_of_separation_you_have_from_the_following_celebrities_francis_bacon_1561_1626"
[120] "which_day_do_you_prefer_friday_or_sunday"
[121] "do_you_eat_apples_the_correct_way_east_to_west_side_to_side_or_do_you_eat_them_like_a_freak_of_nature_south_to_north_bottom_to_top"
[122] "when_you_see_the_above_image_of_the_4_different_websites_which_one_would_you_most_likely_check_out_please_be_honest"
[123] "york_peppermint_patties_ignore"
# 2007
candy_2017_1 <- clean_names(candy_2017)
names(candy_2017_1)
[1] "internal_id"
[2] "q1_going_out"
[3] "q2_gender"
[4] "q3_age"
[5] "q4_country"
[6] "q5_state_province_county_etc"
[7] "q6_100_grand_bar"
[8] "q6_anonymous_brown_globs_that_come_in_black_and_orange_wrappers_a_k_a_mary_janes"
[9] "q6_any_full_sized_candy_bar"
[10] "q6_black_jacks"
[11] "q6_bonkers_the_candy"
[12] "q6_bonkers_the_board_game"
[13] "q6_bottle_caps"
[14] "q6_boxo_raisins"
[15] "q6_broken_glow_stick"
[16] "q6_butterfinger"
[17] "q6_cadbury_creme_eggs"
[18] "q6_candy_corn"
[19] "q6_candy_that_is_clearly_just_the_stuff_given_out_for_free_at_restaurants"
[20] "q6_caramellos"
[21] "q6_cash_or_other_forms_of_legal_tender"
[22] "q6_chardonnay"
[23] "q6_chick_o_sticks_we_don_t_know_what_that_is"
[24] "q6_chiclets"
[25] "q6_coffee_crisp"
[26] "q6_creepy_religious_comics_chick_tracts"
[27] "q6_dental_paraphenalia"
[28] "q6_dots"
[29] "q6_dove_bars"
[30] "q6_fuzzy_peaches"
[31] "q6_generic_brand_acetaminophen"
[32] "q6_glow_sticks"
[33] "q6_goo_goo_clusters"
[34] "q6_good_n_plenty"
[35] "q6_gum_from_baseball_cards"
[36] "q6_gummy_bears_straight_up"
[37] "q6_hard_candy"
[38] "q6_healthy_fruit"
[39] "q6_heath_bar"
[40] "q6_hersheys_dark_chocolate"
[41] "q6_hershey_s_milk_chocolate"
[42] "q6_hersheys_kisses"
[43] "q6_hugs_actual_physical_hugs"
[44] "q6_jolly_rancher_bad_flavor"
[45] "q6_jolly_ranchers_good_flavor"
[46] "q6_joy_joy_mit_iodine"
[47] "q6_junior_mints"
[48] "q6_senior_mints"
[49] "q6_kale_smoothie"
[50] "q6_kinder_happy_hippo"
[51] "q6_kit_kat"
[52] "q6_laffy_taffy"
[53] "q6_lemon_heads"
[54] "q6_licorice_not_black"
[55] "q6_licorice_yes_black"
[56] "q6_lindt_truffle"
[57] "q6_lollipops"
[58] "q6_mars"
[59] "q6_maynards"
[60] "q6_mike_and_ike"
[61] "q6_milk_duds"
[62] "q6_milky_way"
[63] "q6_regular_m_ms"
[64] "q6_peanut_m_m_s"
[65] "q6_blue_m_ms"
[66] "q6_red_m_ms"
[67] "q6_green_party_m_ms"
[68] "q6_independent_m_ms"
[69] "q6_abstained_from_m_ming"
[70] "q6_minibags_of_chips"
[71] "q6_mint_kisses"
[72] "q6_mint_juleps"
[73] "q6_mr_goodbar"
[74] "q6_necco_wafers"
[75] "q6_nerds"
[76] "q6_nestle_crunch"
[77] "q6_nown_laters"
[78] "q6_peeps"
[79] "q6_pencils"
[80] "q6_pixy_stix"
[81] "q6_real_housewives_of_orange_county_season_9_blue_ray"
[82] "q6_reese_s_peanut_butter_cups"
[83] "q6_reeses_pieces"
[84] "q6_reggie_jackson_bar"
[85] "q6_rolos"
[86] "q6_sandwich_sized_bags_filled_with_boo_berry_crunch"
[87] "q6_skittles"
[88] "q6_smarties_american"
[89] "q6_smarties_commonwealth"
[90] "q6_snickers"
[91] "q6_sourpatch_kids_i_e_abominations_of_nature"
[92] "q6_spotted_dick"
[93] "q6_starburst"
[94] "q6_sweet_tarts"
[95] "q6_swedish_fish"
[96] "q6_sweetums_a_friend_to_diabetes"
[97] "q6_take_5"
[98] "q6_tic_tacs"
[99] "q6_those_odd_marshmallow_circus_peanut_things"
[100] "q6_three_musketeers"
[101] "q6_tolberone_something_or_other"
[102] "q6_trail_mix"
[103] "q6_twix"
[104] "q6_vials_of_pure_high_fructose_corn_syrup_for_main_lining_into_your_vein"
[105] "q6_vicodin"
[106] "q6_whatchamacallit_bars"
[107] "q6_white_bread"
[108] "q6_whole_wheat_anything"
[109] "q6_york_peppermint_patties"
[110] "q7_joy_other"
[111] "q8_despair_other"
[112] "q9_other_comments"
[113] "q10_dress"
[114] "x114"
[115] "q11_day"
[116] "q12_media_daily_dish"
[117] "q12_media_science"
[118] "q12_media_espn"
[119] "q12_media_yahoo"
[120] "click_coordinates_x_y"
# wide to long format.
candy_2015_2 <- candy_2015_1 %>%
pivot_longer(cols = 4:96,
names_to = "candy_name",
values_to = "rating")
candy_2016_2 <- candy_2016_1 %>%
pivot_longer(cols = 7:106,
names_to = "candy_name",
values_to = "rating")
candy_2017_2 <- candy_2017_1 %>%
pivot_longer(cols = 7:109,
names_to = "candy_name",
values_to = "rating")
# drop unwanted columns
# 2015
candy_2015_3 <- candy_2015_2 %>%
select(how_old_are_you, candy_name, rating, are_you_going_actually_going_trick_or_treating_yourself)
candy_2015_3
# 2016
candy_2016_3 <- candy_2016_2 %>%
select(how_old_are_you, candy_name, rating, are_you_going_actually_going_trick_or_treating_yourself, which_country_do_you_live_in, your_gender)
candy_2016_3
# 2017
candy_2017_3 <- candy_2017_2 %>%
select(q3_age, candy_name, rating, q1_going_out, q4_country, q2_gender)
candy_2017_3
NA
# Rename columns so that they align across the 3 years of data
# 2015
candy_2015_4 <- candy_2015_3 %>%
rename(age = "how_old_are_you",
candy_name = "candy_name",
rating = "rating",
trick_or_treating = "are_you_going_actually_going_trick_or_treating_yourself")
names(candy_2015_4)
[1] "age" "candy_name" "rating" "trick_or_treating"
# 2016
candy_2016_4 <- candy_2016_3 %>%
rename(age = "how_old_are_you",
candy_name = "candy_name",
rating = "rating",
trick_or_treating = "are_you_going_actually_going_trick_or_treating_yourself",
country = "which_country_do_you_live_in",
gender = "your_gender")
names(candy_2016_4)
[1] "age" "candy_name" "rating" "trick_or_treating"
[5] "country" "gender"
# 2017
candy_2017_4 <- candy_2017_3 %>%
rename(age = "q3_age",
candy_name = "candy_name",
rating = "rating",
trick_or_treating = "q1_going_out",
country = "q4_country",
gender = "q2_gender")
names(candy_2017_4)
[1] "age" "candy_name" "rating" "trick_or_treating"
[5] "country" "gender"
# add columns for country and gender to the 2015 file as this was missing and a year column to all files
candy_2015_5 <- candy_2015_4 %>%
add_column(country = NA, gender = NA, year = "2015") %>%
select(age, candy_name, rating, trick_or_treating, country, gender, year)
candy_2016_5 <- candy_2016_4 %>%
add_column(year = "2016") %>%
select(age, candy_name, rating, trick_or_treating, country, gender, year)
candy_2017_5 <- candy_2017_4 %>%
add_column(year = "2017") %>%
select(age, candy_name, rating, trick_or_treating, country, gender, year)
view(candy_2015_5)
view(candy_2016_4)
view(candy_2017_5)
# bind the data using row bind
candy_combined <- rbind(candy_2015_5, candy_2016_5, candy_2017_5)
view(candy_combined)
# convert the age column to an integer to remove the ones where respondens have added characters. This may treat some as NAs where a year has been provided within the text but I don't know how to extract a number from within a character string
candy_combined2 <- candy_combined %>%
mutate(age = as.integer(age)) %>%
select(age, candy_name, rating, trick_or_treating, country, gender, year)
NAs introduced by coercionNAs introduced by coercion to integer range
view(candy_combined2)
# extract "q6_" from the beginning of the candy_name in 2017
library(rebus)
pattern <- "q6_"
candy_combined3 <- candy_combined2 %>%
mutate(candy_name = str_replace(candy_name, pattern, "")) %>%
select(age, candy_name, rating, trick_or_treating, country, gender, year)
view(candy_combined3)
unique(candy_combined3$candy_name)
[1] "butterfinger"
[2] "x100_grand_bar"
[3] "anonymous_brown_globs_that_come_in_black_and_orange_wrappers"
[4] "any_full_sized_candy_bar"
[5] "black_jacks"
[6] "bonkers"
[7] "bottle_caps"
[8] "box_o_raisins"
[9] "brach_products_not_including_candy_corn"
[10] "bubble_gum"
[11] "cadbury_creme_eggs"
[12] "candy_corn"
[13] "vials_of_pure_high_fructose_corn_syrup_for_main_lining_into_your_vein"
[14] "candy_that_is_clearly_just_the_stuff_given_out_for_free_at_restaurants"
[15] "cash_or_other_forms_of_legal_tender"
[16] "chiclets"
[17] "caramellos"
[18] "snickers"
[19] "dark_chocolate_hershey"
[20] "dental_paraphenalia"
[21] "dots"
[22] "fuzzy_peaches"
[23] "generic_brand_acetaminophen"
[24] "glow_sticks"
[25] "broken_glow_stick"
[26] "goo_goo_clusters"
[27] "good_n_plenty"
[28] "gum_from_baseball_cards"
[29] "gummy_bears_straight_up"
[30] "creepy_religious_comics_chick_tracts"
[31] "healthy_fruit"
[32] "heath_bar"
[33] "hershey_s_kissables"
[34] "hershey_s_milk_chocolate"
[35] "hugs_actual_physical_hugs"
[36] "jolly_rancher_bad_flavor"
[37] "jolly_ranchers_good_flavor"
[38] "kale_smoothie"
[39] "kinder_happy_hippo"
[40] "kit_kat"
[41] "hard_candy"
[42] "lapel_pins"
[43] "lemon_heads"
[44] "licorice"
[45] "licorice_not_black"
[46] "lindt_truffle"
[47] "lollipops"
[48] "mars"
[49] "mary_janes"
[50] "maynards"
[51] "milk_duds"
[52] "laffy_taffy"
[53] "minibags_of_chips"
[54] "joy_joy_mit_iodine"
[55] "reggie_jackson_bar"
[56] "pixy_stix"
[57] "nerds"
[58] "nestle_crunch"
[59] "nown_laters"
[60] "pencils"
[61] "milky_way"
[62] "reese_s_peanut_butter_cups"
[63] "tolberone_something_or_other"
[64] "runts"
[65] "junior_mints"
[66] "senior_mints"
[67] "mint_kisses"
[68] "mint_juleps"
[69] "mint_leaves"
[70] "peanut_m_m_s"
[71] "regular_m_ms"
[72] "mint_m_ms"
[73] "ribbon_candy"
[74] "rolos"
[75] "skittles"
[76] "smarties_american"
[77] "smarties_commonwealth"
[78] "chick_o_sticks_we_don_t_know_what_that_is"
[79] "spotted_dick"
[80] "starburst"
[81] "swedish_fish"
[82] "sweetums"
[83] "those_odd_marshmallow_circus_peanut_things"
[84] "three_musketeers"
[85] "peterson_brand_sidewalk_chalk"
[86] "peanut_butter_bars"
[87] "peanut_butter_jars"
[88] "trail_mix"
[89] "twix"
[90] "vicodin"
[91] "white_bread"
[92] "whole_wheat_anything"
[93] "york_peppermint_patties"
[94] "bonkers_the_candy"
[95] "bonkers_the_board_game"
[96] "boxo_raisins"
[97] "chardonnay"
[98] "coffee_crisp"
[99] "dove_bars"
[100] "hersheys_dark_chocolate"
[101] "hersheys_kisses"
[102] "licorice_yes_black"
[103] "mike_and_ike"
[104] "blue_m_ms"
[105] "red_m_ms"
[106] "third_party_m_ms"
[107] "mr_goodbar"
[108] "necco_wafers"
[109] "peeps"
[110] "person_of_interest_season_3_dvd_box_set_not_including_disc_4_with_hilarious_outtakes"
[111] "reeses_pieces"
[112] "sourpatch_kids_i_e_abominations_of_nature"
[113] "sweet_tarts"
[114] "sweetums_a_friend_to_diabetes"
[115] "tic_tacs"
[116] "whatchamacallit_bars"
[117] "100_grand_bar"
[118] "anonymous_brown_globs_that_come_in_black_and_orange_wrappers_a_k_a_mary_janes"
[119] "green_party_m_ms"
[120] "independent_m_ms"
[121] "abstained_from_m_ming"
[122] "real_housewives_of_orange_county_season_9_blue_ray"
[123] "sandwich_sized_bags_filled_with_boo_berry_crunch"
[124] "take_5"
unique(candy_combined3$country)
[1] NA
[2] "Canada"
[3] "usa"
[4] "US"
[5] "USA"
[6] "UK"
[7] "United States of America"
[8] "uSA"
[9] "Japan"
[10] "united states"
[11] "canada"
[12] "United States"
[13] "us"
[14] "france"
[15] "USSA"
[16] "U.S.A."
[17] "A tropical island south of the equator"
[18] "england"
[19] "uk"
[20] "Switzerland"
[21] "Murica"
[22] "United Kingdom"
[23] "Neverland"
[24] "USA!"
[25] "this one"
[26] "USA (I think but it's an election year so who can really tell)"
[27] "Korea"
[28] "51.0"
[29] "Usa"
[30] "U.S."
[31] "Us"
[32] "America"
[33] "Units States"
[34] "belgium"
[35] "croatia"
[36] "United states"
[37] "Portugal"
[38] "England"
[39] "USA USA USA"
[40] "the best one - usa"
[41] "USA! USA! USA!"
[42] "47.0"
[43] "Cascadia"
[44] "españa"
[45] "u.s."
[46] "there isn't one for old men"
[47] "Panama"
[48] "one of the best ones"
[49] "The Yoo Ess of Aaayyyyyy"
[50] "United Kindom"
[51] "France"
[52] "Australia"
[53] "hungary"
[54] "united states of america"
[55] "Austria"
[56] "Somewhere"
[57] "New Zealand"
[58] "54.0"
[59] "Germany"
[60] "Mexico"
[61] "44.0"
[62] "Brasil"
[63] "god's country"
[64] "South Korea"
[65] "USA!!!!!!"
[66] "Philippines"
[67] "EUA"
[68] "USA! USA!"
[69] "45.0"
[70] "sweden"
[71] "United Sates"
[72] "Sub-Canadian North America... 'Merica"
[73] "The Netherlands"
[74] "Finland"
[75] "Trumpistan"
[76] "U.s."
[77] "Merica"
[78] "China"
[79] "germany"
[80] "See above"
[81] "UNited States"
[82] "kenya"
[83] "30.0"
[84] "Netherlands"
[85] "The republic of Cascadia"
[86] "United Stetes"
[87] "america"
[88] "Not the USA or Canada"
[89] "USA USA USA USA"
[90] "United States of America"
[91] "netherlands"
[92] "Denial"
[93] "United State"
[94] "United staes"
[95] "UAE"
[96] "u.s.a."
[97] "USAUSAUSA"
[98] "35"
[99] "finland"
[100] "unhinged states"
[101] "US of A"
[102] "Unites States"
[103] "The United States"
[104] "North Carolina"
[105] "Unied States"
[106] "Europe"
[107] "Earth"
[108] "U S"
[109] "U.K."
[110] "Costa Rica"
[111] "The United States of America"
[112] "unite states"
[113] "46"
[114] "cascadia"
[115] "insanity lately"
[116] "Greece"
[117] "USA? Hard to tell anymore.."
[118] "'merica"
[119] "usas"
[120] "Pittsburgh"
[121] "45"
[122] "32"
[123] "australia"
[124] "A"
[125] "Can"
[126] "Canae"
[127] "New York"
[128] "Ireland"
[129] "California"
[130] "USa"
[131] "South africa"
[132] "I pretend to be from Canada, but I am really from the United States."
[133] "Uk"
[134] "Iceland"
[135] "Canada`"
[136] "Scotland"
[137] "Denmark"
[138] "United Stated"
[139] "Ahem....Amerca"
[140] "UD"
[141] "New Jersey"
[142] "CANADA"
[143] "Indonesia"
[144] "United ststes"
[145] "United Statss"
[146] "endland"
[147] "Atlantis"
[148] "murrika"
[149] "USAA"
[150] "Alaska"
[151] "united States"
[152] "soviet canuckistan"
[153] "N. America"
[154] "Singapore"
[155] "Taiwan"
[156] "hong kong"
[157] "spain"
[158] "Sweden"
[159] "Hong Kong"
[160] "Narnia"
[161] "u s a"
[162] "United Statea"
[163] "united ststes"
[164] "1"
[165] "subscribe to dm4uz3 on youtube"
[166] "United kingdom"
[167] "USA USA USA!!!!"
[168] "I don't know anymore"
[169] "Fear and Loathing"
# Cleaning the country column. Hard coding...
# Code from Bogdan
candy_combined4 <- candy_combined3 %>%
select(everything()) %>%
mutate(country = recode(country, "Fear and Loathing"= "US",
"USA USA USA!!!!" = "US",
"united ststes" = "US",
"United Statea" = "US",
"u s a" = "US",
"united States" = "US",
"Alaska" = "US",
"USAA" = "US",
"murrika" = "US",
"United Statss" = "US",
"United ststes" = "US",
"New Jersey" = "US",
"UD" = "US",
"Ahem....Amerca" = "US",
"United Stated" = "US",
"I pretend to be from Canada, but I am really from the United States." = "US",
"USa" = "US",
"California" = "US",
"New York" = "US",
"usas" = "US",
"'merica" = "US",
"USA? Hard to tell anymore.." = "US",
"unite states" = "US",
"The United States of America" = "US",
"U S" = "US",
"Unied States" = "US",
"North Carolina" = "US",
"The United States"= "US",
"US of A" = "US",
"unhinged states" = "US",
"USAUSAUSA" = "US",
"u.s.a." = "US",
"United staes" = "US",
"United State" = "US",
"usa" = "US",
"USA" = "US",
"United States of America" = "US",
"uSA" = "US",
"united states" = "US",
"United States" = "US",
"us" = "US",
"USSA" = "US",
"U.S.A." = "US",
"Murica" = "US",
"USA!" = "US",
"USA (I think but it's an election year so who can really tell)" = "US",
"Usa" = "US",
"U.S." = "US",
"Us" = "US",
"America" = "US",
"Units States" = "US",
"United states" = "US",
"USA USA USA" = "US",
"USA! USA! USA!" = "US",
"u.s." = "US",
"The Yoo Ess of Aaayyyyyy" = "US",
"USA!!!!!!" = "US",
"United Sates" = "US",
"Merica" = "US",
"UNited States" = "US",
"the best one - usa" = "US",
"USA! USA!" = "US",
"america" = "US",
"USA USA USA USA" = "US",
"Unites States" = "US",
"united states of america" = "US",
"Sub-Canadian North America... 'Merica" = "US",
"U.s." = "US",
"United Stetes" = "US",
"United States of America" = "US",
"United Kindom" = "UK",
"U.K." = "UK",
"Scotland" = "UK",
"United kingdom" = "UK",
"Uk" = "UK",
"United Kingdom" = "UK",
"uk" = "UK",
"CANADA" = "Canada",
"Canada`" = "Canada",
"Can" = "Canada",
"canada" = "Canada",
"Canae" = "Canada",
"australia" = "Australia",
"germany" = "Germany",
"france" = "France",
"belgium" = "Belgium",
"england" = "UK",
"endland" = "UK",
"hungary" = "Hungary",
"sweden" = "Sweden",
"finland" = "Finland",
"kenya" = "Kenya",
"españa" = "Spain",
"croatia" = "Croatia",
"Brasil" = "Brazil",
"hong kong" = "Hong Kong",
"spain" = "Spain",
"netherlands" = "Netherlands",
"The Netherlands" = "Netherlands"))
unique(candy_combined4$country)
[1] NA
[2] "Canada"
[3] "US"
[4] "UK"
[5] "Japan"
[6] "France"
[7] "A tropical island south of the equator"
[8] "Switzerland"
[9] "Neverland"
[10] "this one"
[11] "Korea"
[12] "51.0"
[13] "Belgium"
[14] "Croatia"
[15] "Portugal"
[16] "England"
[17] "47.0"
[18] "Cascadia"
[19] "Spain"
[20] "there isn't one for old men"
[21] "Panama"
[22] "one of the best ones"
[23] "Australia"
[24] "Hungary"
[25] "Austria"
[26] "Somewhere"
[27] "New Zealand"
[28] "54.0"
[29] "Germany"
[30] "Mexico"
[31] "44.0"
[32] "Brazil"
[33] "god's country"
[34] "South Korea"
[35] "Philippines"
[36] "EUA"
[37] "45.0"
[38] "Sweden"
[39] "Netherlands"
[40] "Finland"
[41] "Trumpistan"
[42] "China"
[43] "See above"
[44] "Kenya"
[45] "30.0"
[46] "The republic of Cascadia"
[47] "Not the USA or Canada"
[48] "Denial"
[49] "UAE"
[50] "35"
[51] "Europe"
[52] "Earth"
[53] "Costa Rica"
[54] "46"
[55] "cascadia"
[56] "insanity lately"
[57] "Greece"
[58] "Pittsburgh"
[59] "45"
[60] "32"
[61] "A"
[62] "Ireland"
[63] "South africa"
[64] "Iceland"
[65] "Denmark"
[66] "Indonesia"
[67] "Atlantis"
[68] "soviet canuckistan"
[69] "N. America"
[70] "Singapore"
[71] "Taiwan"
[72] "Hong Kong"
[73] "Narnia"
[74] "1"
[75] "subscribe to dm4uz3 on youtube"
[76] "I don't know anymore"
candy_combined5 <- candy_combined4 %>%
mutate(country = replace(country, country == "A tropical island south of the equator", NA)) %>%
mutate(country = replace(country, country == "Neverland", NA)) %>%
mutate(country = replace(country, country == "this one", NA)) %>%
mutate(country = replace(country, country == "54.0", NA)) %>%
mutate(country = replace(country, country == "Cascadia", NA)) %>%
mutate(country = replace(country, country == "46", NA)) %>%
mutate(country = replace(country, country == "god's country", NA)) %>%
mutate(country = replace(country, country == "45.0", NA)) %>%
mutate(country = replace(country, country == "EUA", NA)) %>%
mutate(country = replace(country, country == "Not the USA or Canada", NA)) %>%
mutate(country = replace(country, country == "Denial", NA)) %>%
mutate(country = replace(country, country == "35", NA)) %>%
mutate(country = replace(country, country == "cascadia", NA)) %>%
mutate(country = replace(country, country == "45", NA)) %>%
mutate(country = replace(country, country == "Atlantis", NA)) %>%
mutate(country = replace(country, country == "N. America", NA)) %>%
mutate(country = replace(country, country == "1", NA)) %>%
mutate(country = replace(country, country == "I don't know anymore", NA)) %>%
mutate(country = replace(country, country == "subscribe to dm4uz3 on youtube", NA)) %>%
mutate(country = replace(country, country == "Narnia", NA)) %>%
mutate(country = replace(country, country == "soviet canuckistan", NA)) %>%
mutate(country = replace(country, country == "there isn't one for old men", NA)) %>%
mutate(country = replace(country, country == "one of the best ones", NA)) %>%
mutate(country = replace(country, country == "Somewhere", NA)) %>%
mutate(country = replace(country, country == "Trumpistan", NA)) %>%
mutate(country = replace(country, country == "30.0", NA)) %>%
mutate(country = replace(country, country == "See above", NA)) %>%
mutate(country = replace(country, country == "The republic of Cascadia", NA)) %>%
mutate(country = replace(country, country == "Earth", NA)) %>%
mutate(country = replace(country, country == "Pittsburgh", NA)) %>%
mutate(country = replace(country, country == "insanity lately", NA)) %>%
mutate(country = replace(country, country == "51.0", NA)) %>%
mutate(country = replace(country, country == "47.0", NA)) %>%
mutate(country = replace(country, country == "44.0", NA)) %>%
mutate(country = replace(country, country == "32", NA)) %>%
mutate(country = replace(country, country == "A", NA)) %>%
mutate(country = replace(country, country == "Korea", NA)) %>%
mutate(country = replace(country, country == "Europe", NA))
unique(candy_combined5$country)
[1] NA "Canada" "US" "UK"
[5] "Japan" "France" "Switzerland" "Belgium"
[9] "Croatia" "Portugal" "England" "Spain"
[13] "Panama" "Australia" "Hungary" "Austria"
[17] "New Zealand" "Germany" "Mexico" "Brazil"
[21] "South Korea" "Philippines" "Sweden" "Netherlands"
[25] "Finland" "China" "Kenya" "UAE"
[29] "Costa Rica" "Greece" "Ireland" "South africa"
[33] "Iceland" "Denmark" "Indonesia" "Singapore"
[37] "Taiwan" "Hong Kong"
view(candy_combined5)
unique(candy_combined5$gender)
[1] NA "Male" "Female"
[4] "Other" "I'd rather not say"
unique(candy_combined5$age)
[1] 35 41 33 31 30 38 48 39 NA 54
[11] 40 36 47 60 34 44 46 52 37 57
[21] 32 45 58 43 49 64 26 53 27 50
[31] 42 28 13 51 70 19 25 59 61 23
[41] 55 12 21 16 56 22 6 29 24 8
[51] 20 1880 15 71 75 18 17 63 65 62
[61] 69 67 66 74 10 72 200587 14 9 11
[71] 68 99 5 7 77 100 115 123 388 120
[81] 0 108 350 2000 400 85 97 490 78 79
[91] 81 82 142 90 312 88 102 76 1000 73
[101] 1 4
# Looks like there are some dodgy outliers in the age column
library(outliers)
ggplot(candy_combined5, aes(x = "age", y = age)) +
geom_boxplot()
age_zscores <- !is.na(scores(candy_combined5$age))
is_outlier <- age_zscores > 3 | age_zscores < -3
age_outliers <- candy_combined5 %>%
drop_na() %>%
mutate(is_outlier = is_outlier)
Error: Column `is_outlier` must be length 274150 (the number of rows) or one, not 902870
age_outliers <- candy_combined5 %>%
select(age, is_outlier) %>%
filter(is_outlier == TRUE)
age_outliers
# Looks like the above outlier code dosnt work. Maybe because there are NAs and the zscores thing wont work. I'll take a pragmatic view of the age outliers in this case and remove anyone over 120
candy_combined7 <- candy_combined5 %>%
select(age, candy_name, rating, trick_or_treating, country, gender, year) %>%
filter(age <= 120)
view (candy_combined7)
unique(candy_combined7$age)
[1] 35 41 33 31 30 38 48 39 54 40 36 47 60 34 44 46 52 37
[19] 57 32 45 58 43 49 64 26 53 27 50 42 28 13 51 70 19 25
[37] 59 61 23 55 12 21 16 56 22 6 29 24 8 20 15 71 75 18
[55] 17 63 65 62 69 67 66 74 10 72 14 9 11 68 99 5 7 77
[73] 100 115 120 0 108 85 97 78 79 81 82 90 88 102 76 73 1 4
here::here()
[1] "/Users/greganderson/codeclan_work/data_cleaning_project/task_4"
write_csv(candy_combined7, here("clean_data/clean_candy.csv"))
```